1. Problem 1: Highlights from Homework 1

1.1. Problem 2 from Homework 1: Simple Gibbs Sampling

library(shiny)
library(shinydashboard)

ui<-dashboardPage(
  dashboardHeader(title="2D Guassian"),
  dashboardSidebar(
sliderInput(inputId = "rho",
label="Choose Correlation",value = 0.5, min = 0, max = 0.99,step=0.01),
sliderInput(inputId = "niter"
,label = "Size of Sample",value=100,min=100,max=10000)),
  dashboardBody(plotOutput("plot1"))
)

server<-function(input,output){
  output$plot1<-renderPlot({
    rho=input$rho
    x = c(-5, 5)
    plot(NA, NA, xlim = c(-5, 5),  ylim = c(-5, 5))
    iter = as.integer(input$niter)
    xx = matrix(NA, iter, 2)
    for(i in 1:iter) {
      x[1] = rnorm(1, mean = rho * x[2],  sd = sqrt(1-rho^2))
      x[2] = rnorm(1, mean = rho * x[1],  sd = sqrt(1-rho^2))
      points(x[1], x[2], cex = 0.5)
      xx[i, ] = x
    }
    plot(xx[,1],xx[,2],col="blue",
  cex=0.5,pch=20,xlab="x",ylab="y",xlim=c(-5,5),ylim=c(-5,5))
  })
}

shinyApp(ui = ui, server = server)

As we can see from the plot (Note*: Here, you should install shiny package to run the code above to see the plot or go to this link https://kaustzp.shinyapps.io/hw12zp/ to check it out!), when \(\rho\) approaches to 1 from 0, the linearity between \(x\) and \(y\) increases. The correlation between \(x\) and \(y\) implies the linearity between \(x\) and \(y\).

1.2. Problem 4 from Homework 1: Adaptive Rejection Sampling

#obtain the intersection points
x.inter <- function(x.sample,logends){
  z.inter=c(0);
  for(j in 1:(length(x.sample)-1) ){
    z.inter[j] <- (logends(x.sample[j+1])-logends(x.sample[j])-x.sample[j+1]
   *grad.new(logends,x.sample[j+1])+
x.sample[j]*grad.new(logends,x.sample[j]))/(grad.new(logends,x.sample[j])-
      grad.new(logends,x.sample[j+1]))
    
  }
  return(z.inter)
}

# obtain the upperbound function of logdens, which is concave. 
upbd <- function(x,x.sample,logends){
  x.in<-x.inter(x.sample,logends);
  y=c(0);
  for(j in 1:length(x)){
    if( x[j] <= x.in[1]){
      y[j] = logends(x.sample[1])+
      (x[j]-x.sample[1])*grad.new(logends,x.sample[1])
    }
    if(x[j] > x.in[length(x.in)]){
      y[j] = logends(x.sample[length(x.sample)])+
      (x[j]-x.sample[length(x.sample)])*grad.new(logends,x.sample[length(x.sample)])
    }
    
    if(length(x.in) >= 2){
      for(i in 1:(length(x.in)-1)){
        if(x.in[i] < x[j] && x[j] <= x.in[i+1]){
          y[j] = logends(x.sample[i+1])+
          (x[j]-x.sample[i+1])*grad.new(logends,x.sample[i+1])
        }
      }
    }
  } 
  return(y)
}

#obtain the CDF of Upperbound function
cdf.upbd <- function(x,x.sample,logends,x.lower,x.upper){
  x.in=c(x.lower,x.inter(x.sample,logends),x.upper);
  y=rep(0,length(x));
  for(i in 1:length(x)){
    posi=findInterval(x[i],x.in)
    if(posi>1){
      for(j in 1:(posi-1)){
        val=logends(x.sample[j]);
        dre=grad.new(logends,x.sample[j]);
        y[i]=y[i]+exp(val-x.sample[j]*dre)/dre*(exp(x.in[j+1]*dre)-exp(x.in[j]*dre))
      }
    }
    if(posi==1){
      y[i]=0;
    }
    if(posi!=length(x.in) && posi!=1){
      val=logends(x.sample[posi]);
      dre=grad.new(logends,x.sample[posi]);
      y[i]=y[i]+exp(val-x.sample[posi]*dre)/dre*(exp(x[i]*dre)-exp(x.in[posi]*dre));
    }
  }
  return(y)
}
# sampling from the upperbound 
sample.upbd <- function(x.sample,logends,x.lower,x.upper){
  x.in=c(x.lower,x.inter(x.sample,logends),x.upper);
  I = cdf.upbd(x.in,x.sample,logends,x.lower,x.upper);
  unif = runif(1)*I[length(I)];
  posi=findInterval(unif,I);
  res=unif-I[posi]
  dre=grad.new(logends,x.sample[posi]);
  val=logends(x.sample[posi]);
  if(dre!=0){
    y1=dre*res*exp(x.sample[posi]*dre-val)+exp(x.in[posi]*dre)
    y2=log(y1)/dre;
  }else{
    y2=res*exp(-val)+x.in[posi]
  }
  return(y2)
}

#sampling step
my.ars <- function(x.init,logends,x.lower,x.upper,n) {
  x.sample<- x.init;
  m=length(x.init);
  while (length(x.sample) <= n+m-1)  {
    a = sample.upbd(x.init,logends,x.lower,x.upper)
    b = exp(logends(a)-upbd(a,x.init,logends))
    if(runif(1) < b && a>=x.lower &&a<=x.upper) {
      x.sample[length(x.sample)+1] <- a
      if(length(x.init)<=n^(1/2)){
        x.init[length(x.init)+1]<-a
        x.init<-sort(x.init, decreasing = FALSE)
      }
    }
    x.sample <- sort(x.sample, decreasing = FALSE)
  }
  return(x.sample[!x.sample %in% x.init])
}
#Having Fun with ARS
grad.new<-function(logends,x){
  return(142/x-57)
}
#define the log concave funtion, which is the target log-form distribution.
logends2<-function(x){
  return(142*log(x)-57*x)
}
par(mfrow=c(2,1))
nor2=my.ars(c(1,3),logends2,0,Inf,1000)
plot(density(nor2),xlab ="Values of Random Variables"
,ylab="Density",main = "Comparision of Sample and Its Distribution: Gamma Distribution")
points(seq(min(nor2),max(nor2),0.001),
dgamma(seq(min(nor2),max(nor2),0.001),141,57),pch=".",col="red")

logends1<-function(x){
  return(-1/2*x^2)
}
grad.new<-function(logends,x){
  return(-x)
}
nor1=my.ars(c(-5,5),logends1,-Inf,Inf,1000)
plot(density(nor1),xlab ="Values of Random Variables",
ylab="Density",main = "Comparision of Sample and Its Distribution: Standard Guassian")
points(seq(min(nor1),max(nor1),0.001),
dnorm(seq(min(nor1),max(nor1),0.001)),pch=".",col="red")

Due to failures of the function \(numDeriv::grad\) sometimes, when there is a \(log\) function, the grad is not stable near \(0\). Since we also have the explicit distribution function, we can find its explicit first derivative function, which is \(grad.new\). It turns out that it works efficiently. Amazing Hah!

2. Problem 2: Highlights from Homework 2

2.1. Problem 2 from Homework 2: Uniform

2.1.1.

Since \(x^{'}=fx\), and \(f\sim Unif(1/F,F)\), the conditional \(pdf\) is, \[P[x^{'}|x]=P_{f}(\frac{x^{'}}{x})\frac{1}{x}\] where \(P_f(f)\) is the \(pdf\) of \(f\). Therefore, the acceptance probability for the MH algorithm is \(min\{1,\frac{x}{x^{'}}\}\)

#Bayesian HW2.2.1
unif1<-function(d,n){
  x=c(0.5);
  j=1;
for(j in 1:n){
    xp=runif(1,x[j]/d,x[j]*d)
    prob=min(1,x[j]/xp*as.numeric(xp<=1))
    if(runif(1) < prob){
      x[j+1]=xp;
    }
    else{
      x[j+1]=x[j];
    }
  }
  return(x);
}
plot(density(unif1(3,10000)),xlim=c(0,1),xlab="Value of X",
  ylab="Density of X",main="Plot of density for the simulation")
abline(h=1,col="red")

2.1.2.

Since the prior for \(x\) is unifrom, the acceptance probablity in the MH algorithm is \[min\{1,\frac{P[x|x^{'}]}{P[x^{'}|x]}=\frac{P_{f}(\frac{x}{x^{'}})\frac{1}{x^{'}}}{P_{f}(\frac{x^{'}}{x})\frac{1}{x}}\}\] Therefore, \(\frac{P[x|x^{'}]}{P[x^{'}|x]}=\frac{P_{f}(\frac{x}{x^{'}})\frac{1}{x^{'}}}{P_{f}(\frac{x^{'}}{x})\frac{1}{x}}>=1\), when \(0<x^{'}<1\). Then \(P_f(u)*u\geq P_f(\frac{1}{u})\), where \(u=\frac{x}{x^{'}}\). Thus, I find \(P_f(f)\propto \frac{1}{\sqrt{f}}\) will fit this condition. By using this \(pdf\) of \(f\), we can get good simulation results.

Since the acceptance probability is 1, when \(0<x^{'}<1\), we don’t have to get one one sample from uniform to do the evaluation. In other words, the sample process don’t rely on another uniform variable, this will make the algorithm faster. We prefer sampling from \(p_f(f)\) instead of \(Unif\{\frac{1}{F},F\}\).

2.1.3.

#Bayesian HW2.2.2
#uPf(u)<=Pf(1/u) u=x'/x PF(u)=1/sqrt(u)

unif2<-function(d,n){
  x=c(0.5);
  j=1;
  for(j in 1:n){
    u<-((runif(1,min = 1/d,max=d)-1/d)/(d-1/d)*sqrt(x[j])*
      (sqrt(d)-1/sqrt(d))+sqrt(x[j])*1/sqrt(d))^2
    if(runif(1)<as.numeric(u<1)){
      x[j+1]=u;
    }else{
      x[j+1]=x[j]
    }
  }
  return(x)
}
plot(density(unif2(3,10000)),xlim=c(0,1),xlab="Value of X",
ylab="Density of X",main="Plot of density for the simulation")
abline(h=1,col="red")

The acceptance probility for this MH algorithm is \[I(0<x^{'}<1)\], where \(I(x)\) is indicator function.

2.2. Problem 3 from Homework 2: Change-point model

I changed the domain of the change point and use ARS algorithm to sample.

library(boot)
data(coal)
y<-tabulate(floor(coal[[1]]))
y<-y[1851:length(y)]
n<-length(y)
par(mfrow=c(1,2))
plot(c(1851:(1851+length(y)-1)),
y,xlab = "Years",ylab = "Frequency of Disasters",type="h");
lines(c(1851+35,1851+96),c(4,4),col="red")

update.lambda = function(yy, prior){
  

  #obtain the intersection points
  x.inter <- function(x.sample,logends){
    z.inter=c(0);
    for(j in 1:(length(x.sample)-1) ){
      z.inter[j] <- (logends(x.sample[j+1])-logends(x.sample[j])-
      x.sample[j+1]*grad.new(logends,x.sample[j+1])+
     x.sample[j]*grad.new(logends,x.sample[j]))/(grad.new(logends,x.sample[j])-
      grad.new(logends,x.sample[j+1]))

    }
    return(z.inter)
  }

  # obtain the upperbound function of logdens, which is concave.
  upbd <- function(x,x.sample,logends){
    x.in<-x.inter(x.sample,logends);
    y=c(0);
    for(j in 1:length(x)){
      if( x[j] <= x.in[1]){
        y[j] = logends(x.sample[1])+(x[j]-x.sample[1])*grad.new(logends,x.sample[1])
      }
      if(x[j] > x.in[length(x.in)]){
        y[j] = logends(x.sample[length(x.sample)])+
        (x[j]-x.sample[length(x.sample)])*
          grad.new(logends,x.sample[length(x.sample)])
      }
      
      if(length(x.in) >= 2){
        for(i in 1:(length(x.in)-1)){
          if(x.in[i] < x[j] && x[j] <= x.in[i+1]){
            y[j] = logends(x.sample[i+1])+
            (x[j]-x.sample[i+1])*grad.new(logends,x.sample[i+1])
          }
        }
      }
    } 
    return(y)
  }

  #obtain the CDF of Upperbound function
  cdf.upbd <- function(x,x.sample,logends,x.lower,x.upper){
    x.in=c(x.lower,x.inter(x.sample,logends),x.upper);
    y=rep(0,length(x));
    for(i in 1:length(x)){
      posi=findInterval(x[i],x.in)
      if(posi>1){
        for(j in 1:(posi-1)){
          val=logends(x.sample[j]);
          dre=grad.new(logends,x.sample[j]);
          if(dre!=0){
          y[i]=y[i]+exp(val-x.sample[j]*dre)/dre*(exp(x.in[j+1]*dre)-exp(x.in[j]*dre))
          }else{
            y[i]=y[i]+exp(val)*(x.in[j+1]-x.in[j])
          }
        }
      }
      if(posi==1){
        y[i]=0;
      }
      if(posi!=length(x.in) && posi!=1){
        val=logends(x.sample[posi]);
        dre=grad.new(logends,x.sample[posi]);
        if(dre!=0){
        y[i]=y[i]+exp(val-x.sample[posi]*dre)/dre*(exp(x[i]*dre)-exp(x.in[posi]*dre));
        }else{
          y[i]=y[i]+exp(val)*(x[i]-x.in[posi])
        }
      }
    }
    return(y)
  }
  # sampling from the upperbound
  sample.upbd <- function(x.sample,logends,x.lower,x.upper){
    x.in=c(x.lower,x.inter(x.sample,logends),x.upper);
    I = cdf.upbd(x.in,x.sample,logends,x.lower,x.upper);
    unif = runif(1)*I[length(I)];
    posi=findInterval(unif,I);
    res=unif-I[posi]
    dre=grad.new(logends,x.sample[posi]);
    val=logends(x.sample[posi]);
    if(dre!=0){
    y1=dre*res*exp(x.sample[posi]*dre-val)+exp(x.in[posi]*dre)
    y2=log(y1)/dre;
    }else{
      y2=res*exp(-val)+x.in[posi]
    }
    return(y2)
  }

  #sampling step
  my.ars <- function(x.init,logends,x.lower,x.upper,n) {
    x.sample<- x.init;
    m=length(x.init);
    while (length(x.sample) <= n+m-1)  {
      a = sample.upbd(x.init,logends,x.lower,x.upper)
      b = exp(logends(a)-upbd(a,x.init,logends))
      if(runif(1) < b && a>=x.lower &&a<=x.upper) {
        x.sample[length(x.sample)+1] <- a
        if(length(x.init)<=n^(1/2)){
          x.init[length(x.init)+1]<-a
          x.init<-sort(x.init, decreasing = FALSE)
        }
      }
      x.sample <- sort(x.sample, decreasing = FALSE)
    }
    return(x.sample[!x.sample %in% x.init])
  }
  y.sum = sum(yy)
  y.len = length(yy)
  shape = prior$a + y.sum
  rate = prior$b + y.len
  #define the logend function of Gamma distribution
  logends<-function(x){
   return((shape-1)*log(x)-rate*x)
  }
  #Due to the failure of grad function
  #when there is log, we should define the grad specifically.
  grad.new<-function(logends,x){
    return((shape-1)/x-rate)
  }
  return (my.ars(c((shape-min(5,floor(shape/2))):(shape+5))/rate,logends,0,Inf,1))
}

update.changepoint = function(yy, lam.L, lam.R){
  #browser() to debug step by step
  n.low=36
  n.high=97
  log.p = numeric(n.high-n.low+1) #products can overflow
  for (k in n.low:n.high){
    log.p[k-n.low+1] = sum(dpois(y[1:k], lam.L, log = TRUE)) +
      sum(dpois(y[-(1:k)], lam.R, log=TRUE))
  }
  #log.p = log.p - max(log.p) 
  #numerically safe, just like dividing the maximum likelihood
  p = exp(log.p) #/ sum(exp(log.p)), sample f will normalize
  return (sample(n.low:n.high,size = 1,prob = p))
}

prior = list(a=1,b=1)
lambda.L = 1
lambda.R = 1
k = floor(n/2)

vec <- numeric(10000)
for (iter in 1:10000){
  lambda.L = update.lambda(y[1:k],prior)
  lambda.R = update.lambda(y[-(1:k)],prior)
  k = update.changepoint(y, lambda.L, lambda.R)
  vec[iter] = k
}

hist(vec,xlab="Change-Point",ylab="Frequency",main="Change-Point Histogram")

Here, I assume the change-point is located between the red line in the first plot, which is my guess by my naked eye. Using the Adaptive Rejection Sampling Alogrithm, we get the result, which is very likely to the result we get in previous lecture.

3. Problem 3: Using JAGS

3.1. Using JAGS for one-change-point case

library(boot)
library(runjags)

changepoint1<-"model{
  cp ~ dcat(rep(1,n-1))
  lambda.L ~ dgamma(1,1)
  lambda.R ~ dgamma(1,1)
  for(i in 1:n){
    lambda[i]<-ifelse(i<=cp,lambda.L,lambda.R)
    y[i] ~ dpois(lambda[i])
  }
}"
data(coal)
y <- tabulate(floor(coal[[1]]))
y <- y[1851:length(y)]
n <- length(y)
r = run.jags(model=changepoint1,
monitor = c("cp","lambda.L","lambda.R"),data=list(y=y,n=n))
## Warning: No initial value blocks found and n.chains not specified: 2 chains
## were used
## Loading required namespace: rjags
## Warning: No initial values were provided - JAGS will use the same initial
## values for all chains
## Compiling rjags model...
## Calling the simulation using the rjags method...
## Adapting the model for 1000 iterations...
## Burning in the model for 4000 iterations...
## Running the model for 10000 iterations...
## Simulation complete
## Calculating summary statistics...
## Calculating the Gelman-Rubin statistic for 3 variables....
## Finished running the simulation
summary(r)
##             Lower95     Median   Upper95       Mean        SD Mode
## cp       35.0000000 40.0000000 45.000000 40.0539000 2.4143294   41
## lambda.L  2.5199835  3.0552780  3.609130  3.0650930 0.2812544   NA
## lambda.R  0.7004575  0.9176639  1.151082  0.9222093 0.1154052   NA
##                MCerr MC%ofSD SSeff         AC.10      psrf
## cp       0.019328864     0.8 15602  0.0072526338 1.0006424
## lambda.L 0.002648744     0.9 11275 -0.0033517631 0.9999713
## lambda.R 0.001113414     1.0 10743  0.0007995874 1.0000459
plot(r)
## Generating plots...

It looks the same as the what we do in the previous question.

3.2. Using JAGS for two-change-point case

library(boot)
library(runjags)
data(coal)
changepoint2<-"model{
  cp1 ~ dcat(rep(1,n-1))
  cp2 ~ dcat(rep(1,n-1))
  lambda.L ~ dgamma(1,1)
  lambda.R ~ dgamma(1,1)
  lambda.M ~ dgamma(1,1)
  for(i in 1:n){
    lambda[i]<-ifelse(i<=max(cp1,cp2)-abs(cp1-cp2),
lambda.L,ifelse(i<=abs(cp1-cp2)+min(cp1,cp2),lambda.M,lambda.R))
    y[i] ~ dpois(lambda[i])
  }
}"
y <- tabulate(floor(coal[[1]]))
y <- y[1851:length(y)]
n <- length(y)
r = run.jags(model=changepoint2,monitor = 
c("cp1","cp2","lambda.L","lambda.M","lambda.R"),data=list(y=y,n=n))
## Warning: No initial value blocks found and n.chains not specified: 2 chains
## were used
## Warning: No initial values were provided - JAGS will use the same initial
## values for all chains
## Compiling rjags model...
## Calling the simulation using the rjags method...
## Adapting the model for 1000 iterations...
## Burning in the model for 4000 iterations...
## Running the model for 10000 iterations...
## Simulation complete
## Calculating summary statistics...
## Calculating the Gelman-Rubin statistic for 5 variables....
## Finished running the simulation
summary(r)
##              Lower95     Median     Upper95       Mean         SD Mode
## cp1      35.00000000 46.0000000 102.0000000 66.6548500 29.1154187   97
## cp2      35.00000000 44.0000000 102.0000000 66.2337000 29.1041807   97
## lambda.L  2.53940079  3.0742745   3.6506799  3.0844584  0.2867141   NA
## lambda.M  0.72239772  1.0763072   1.4546339  1.1105444  0.2870315   NA
## lambda.R  0.07417152  0.3553162   0.9358108  0.4155581  0.2429945   NA
##                MCerr MC%ofSD SSeff       AC.10     psrf
## cp1      2.411547769     8.3   146 0.825870905 1.001238
## cp2      2.317857661     8.0   158 0.830520970 1.003176
## lambda.L 0.002772790     1.0 10692 0.003803706 1.000096
## lambda.M 0.011195700     3.9   657 0.464058036 1.008497
## lambda.R 0.005656295     2.3  1846 0.174742348 1.000108
plot(r)
## Generating plots...

Discuss

Actually, here I set two change-point whose prior is independent and identical distributed, which are \(k_1\) and \(k_2\). So there are two peaks in plot of each change point. To determine which poisson mean to use, I choose .

Actually, here I set two change-point whose prior is independent and identical distributed. So there are two peaks in plot of each change point.

4.Problem 4: Microarray data

Assume the \(\nu_g\)’s to be normal distributed with mean \(\mu\)’s and precision \(\rho\), the \(\Delta_g\)’s to be normal distributed with mean \(m\) and precision \(r\), and the \(\tau_g\)’s to be gamma distributed with mean \(\alpha\) and variance \(\beta\).

\[\begin{align*} &\ \ \ P[\nu_g,\Delta_g,\tau_g,x_{g_1},\ldots,x_{g_{S_{1}}},y_{g_1},\ldots,y_{g_{S_2}}] \\ &\propto\tau_g^{\frac{S_1+S_2}{2}+\frac{\alpha^2}{\beta}-1}\cdot\exp\{-\frac{\sum^{S_1}_{i=1}\tau_g(x_{g_i}-\nu_g-\Delta_g)^2+\sum^{S_2}_{i=1}\tau_g(y_{g_{i}}-\nu_g+\Delta_g)^2+\rho(\nu_g-\mu)^2+r(\Delta_g-m)^2}{2}-\frac{\alpha\tau_g}{\beta}\} \end{align*}\]

1.

If \(\nu_g\) has a normal prior distribution as the setting above, then the conditional distribution for \(\nu_g\) is appropotional to the distribution above.

\[\nu_g|ALL\sim N(\frac{\tau_g(\sum_{i=1}^{S_1}x_{g_i}+\sum_{i=i}^{S_2}y_{g_i}+(S_2-S_1)\Delta_g)+\rho\mu}{\tau_g(S_1+S_2)+\rho},\frac{1}{\tau_g(S_1+S_2)+\rho})\]

Thus,the normal distribution is the conjugate distribution for \(\nu_g\).

2.

If \(\Delta_g\) has a normal prior distribution as the setting above, then the conditional distribution for \(\Delta_g\) is appropotional to the distribution above.

\[\Delta_g|ALL\sim N(\frac{\tau_g(\sum^{S_1}_{i=1}x_{g_i}-\sum^{S_2}_{i=i}y_{g_i}+(S_2-S_1)\nu_g)+rm}{\tau_g(S_1+S_2)+r},\frac{1}{\tau_g(S_1+S_2)+r})\]

Thus, the normal distribution is the conjugate distribution for \(\Delta_g\).

If \(\tau_g\) follows a gamma distribution as the setting above, then the conditional distribution for \(\tau_g\) is appropotional to the distribution above.

\[\tau_g|ALL \sim Gamma(\frac{S_1+S_2}{2}+\frac{\alpha^2}{\beta},\frac{\alpha}{\beta}+\frac{\sum^{S_1}_{i=1}(x_{g_i}-\nu_g-\Delta_g)^2+\sum^{S_2}_{i=1}(y_{g_{i}}-\nu_g+\Delta_g)^2}{2})\]

3.

Now, we assume that \(\mu=m=0,\rho=r=\alpha=\beta=\frac{1}{100}\). The graphical model is as follows.

library(DiagrammeR)

grViz("
      digraph boxes_and_circles{
      
      #add node statement
      node[shape=box]
      X_gi;Y_gi
      
      node[shape=circle]
      Delta_g;Tau_g;Nu_g
      #add edge statement
      Delta_g->X_gi;Delta_g->Y_gi;Tau_g->X_gi;Tau_g->Y_gi;Nu_g->X_gi;Nu_g->Y_gi
      }
      ")
x1<-read.table("x1.txt")
x2<-read.table("x2.txt")
y1<-read.table("y1.txt")
y2<-read.table("y2.txt")

#Question 3
#initialize the parameters
r=3 #rows to implement and run
n=10000# number of sample to get

u1=0 #mean of V
u2=0#mean of Delta
t1=1/100# inverse of the variance of Delta
t2=1/100# inverse of the variance of V
alpha=1/100#shape of the gamma variable T
beta=1/100#rate of the gamma variable T

v=list(dataset1=matrix(0,n+1,r),dataset2=matrix(0,n+1,r))
v[[1]][1,]=rep(0,r)
v[[2]][1,]=rep(0,r)
delta=list(dataset1=matrix(0,n+1,r),dataset2=matrix(0,n+1,r))
delta[[1]][1,]=rep(0,r)
delta[[2]][1,]=rep(0,r)
t=list(dataset1=matrix(0,n+1,r),dataset2=matrix(0,n+1,r))
t[[1]][1,]=rep(1,r)
t[[2]][1,]=rep(1,r)

x<-list(x1=x1,x2=x2);
y<-list(y1=y1,y2=y2);
#gibbs sampling
for(j in 1:2){# two files to sample
  x_sum=sum(x[[j]][1,])
  y_sum=sum(y[[j]][1,])
  x_length=length(x[[j]][1,])
  y_length=length(x[[j]][1,])
   for(i in 1:r){ #r genes to  implement from each dataset
    for(k in 2:n+1){# 1000 samples to get
      b1=t[[j]][k-1,i]*(x_sum-x_length*delta[[j]][k-1,i]+y_sum+y_length*delta[[j]][k-1,i])+t1*u1
      a1=t[[j]][k-1,i]*( x_length+y_length)+t1
      v[[j]][k,i]=rnorm(1,b1/a1, 1/sqrt(a1))
      
      b2=t[[j]][k-1,i]*(x_sum - x_length*v[[j]][k,i]-y_sum +y_length*v[[j]][k,i])+t2*u2
      a2=t[[j]][k-1,i]*(x_length+y_length)+t2
      delta[[j]][k,i]=rnorm(1,b2/a2, 1/sqrt(a2))
      
      t[[j]][k,i]=rgamma(1,shape=(x_length+y_length)/2+alpha,rate=beta+
  sum((x[[j]][i,]-v[[j]][k,i]-delta[[j]][k,i])^2)+sum((y[[j]][i,]-v[[j]][k,i]+delta[[j]][k,i])^2))
    }
  }
}
for(j in 1:2){
  for(i in 1:r){
par(mfrow=c(3,1))
plot(v[[j]][,i],type="l",xlab="Iteration",ylab="Value of Nu",
main=paste("Covergence of Nu in Gene",i,"for",ifelse(j==1,"First Data","Second Data")))
plot(t[[j]][,i],type="l",xlab="Iteration",ylab="Value of Tau",
main=paste("Covergence of Tau in Gene",i,"for",ifelse(j==1,"First Data","Second Data")))
plot(delta[[j]][,i],type="l",xlab="Iteration",ylab="Value of Delta",
main=paste("Covergence of Delta in Gene",i,"for",ifelse(j==1,"First Data","Second Data")))
  }
}

From the plot, we should conclude convergence for these parameters and since we have assumed that \(\Delta_g\) have mean 1 and variance 100, according to these plot, we should conclude that none of these genes are differential expressed respect to any significant level.

4.

\[\begin{align*} &\ \prod_gP[\nu_g,\Delta_g,\tau_g,\mu,m,\rho,r,\alpha,\beta,x_{g_1},\ldots,x_{g_{S_{1}}},y_{g_1},\ldots,y_{g_{S_2}}] \\ &\propto\frac{1}{\rho r}\prod_g\rho^{\frac{1}{2}}r^{\frac{1}{2}}\tau_g^{\frac{S_1+S_2}{2}+\frac{\alpha^2}{\beta}-1}\cdot\exp\{-\frac{\sum^{S_1}_{i=1}\tau_g(x_{g_i}-\nu_g-\Delta_g)^2+\sum^{S_2}_{i=1}\tau_g(y_{g_{i}}-\nu_g+\Delta_g)^2+\rho(\nu_g-\mu)^2+r(\Delta_g-m)^2}{2}-\frac{\alpha\tau_g}{\beta}\} \end{align*}\]

\[\nu_g|ALL\sim N(\frac{\tau_g(\sum_{i=1}^{S_1}x_{g_i}+\sum_{i=i}^{S_2}y_{g_i}+(S_2-S_1)\Delta_g)+\rho\mu}{\tau_g(S_1+S_2)+\rho},\frac{1}{\tau_g(S_1+S_2)+\rho})\]

\[\Delta_g|ALL\sim N(\frac{\tau_g(\sum^{S_1}_{i=1}x_{g_i}-\sum^{S_2}_{i=i}y_{g_i}+(S_2-S_1)\nu_g)+rm}{\tau_g(S_1+S_2)+r},\frac{1}{\tau_g(S_1+S_2)+r})\]

\[\tau_g|ALL \sim Gamma(\frac{S_1+S_2}{2}+\frac{\alpha^2}{\beta},\frac{\alpha}{\beta}+\frac{\sum^{S_1}_{i=1}(x_{g_i}-\nu_g-\Delta_g)^2+\sum^{S_2}_{i=1}(y_{g_{i}}-\nu_g+\Delta_g)^2}{2})\]

\[\mu|ALL \sim N(\frac{\sum_{g}\nu_g}{G},\frac{1}{G\rho})\]

\[m|ALL\sim N(\frac{\sum_{g}\Delta_g}{G},\frac{1}{Gr})\]

\[\rho|ALL\sim Gamma(\frac{G}{2},\frac{\sum_{g}(\nu_g-\mu)^2}{2})\]

\[r|ALL\sim Gamma(\frac{G}{2},\frac{\sum_{g}(\Delta_g-m)^2}{2})\]

\[p(\alpha|ALL) \propto (\frac{\frac{\alpha}{\beta}^{\frac{\alpha^2}{\beta}-1}}{\Gamma(\frac{\alpha^2}{\beta})})^G\prod_g\tau_g^{\frac{\alpha^2}{\beta}}e^{-\frac{\alpha\tau_g}{\beta}}\]

\[p(\beta|ALL) \propto (\frac{\frac{\alpha}{\beta}^{\frac{\alpha^2}{\beta}-1}}{\Gamma(\frac{\alpha^2}{\beta})})^G\prod_g\tau_g^{\frac{\alpha^2}{\beta}}e^{-\frac{\alpha\tau_g}{\beta}}\]

The graphical model is as follows.

library(DiagrammeR)

grViz("
digraph boxes_and_circles{

#add node statement
node[shape=box]
X_gi;Y_gi

node[shape=circle]
mu;rho;m;r;alpha;beta;Delta_g;Tau_g;Nu_g
#add edge statement
mu->Nu_g;rho->Nu_g;m->Delta_g;r->Delta_g;alpha->Tau_g;beta->Tau_g;
Delta_g->X_gi;Delta_g->Y_gi;Tau_g->X_gi;Tau_g->Y_gi;Nu_g->X_gi;Nu_g->Y_gi
}
      ")

5

x1<-read.table("x1.txt")
x2<-read.table("x2.txt")
y1<-read.table("y1.txt")
y2<-read.table("y2.txt")

#Question 3
#initialize the parameters
r=10 #rows to implement and run
n=1000# number of sample to get

u1=cbind(rep(0,n+1),rep(0,n+1))

u2=cbind(rep(0,n+1),rep(0,n+1))

t1=cbind(c(1/100,rep(0,n)),c(1/100,rep(0,n)))

t2=cbind(c(1/100,rep(0,n)),c(1/100,rep(0,n)))

alpha=cbind(c(100,rep(0,n)),c(100,rep(0,n)))

beta=cbind(c(100,rep(0,n)),c(100,rep(0,n)))

v=list(dataset1=matrix(0,n+1,r),dataset2=matrix(0,n+1,r))
v[[1]][1,]=rep(0,r)
v[[2]][1,]=rep(0,r)
delta=list(dataset1=matrix(0,n+1,r),dataset2=matrix(0,n+1,r))
delta[[1]][1,]=rep(0,r)
delta[[2]][1,]=rep(0,r)
t=list(dataset1=matrix(0,n+1,r),dataset2=matrix(0,n+1,r))
t[[1]][1,]=rep(1,r)
t[[2]][1,]=rep(1,r)

x<-list(x1=x1,x2=x2);
y<-list(y1=y1,y2=y2);
for(j in 1:2){ 
  #two files to sample
  x_sum=sum(x[[j]][1,])
  y_sum=sum(y[[j]][1,])
  x_length=length(x[[j]][1,])
  y_length=length(x[[j]][1,])
    for(k in 2:(n+1)){ 
      # n samples to get
      a=alpha[k-1,j]^2/beta[k-1,j]
      b=alpha[k-1,j]/beta[k-1,j]
      for(i in 1:r){  
        #r genes to implement from each dataset

        b1=t[[j]][k-1,i]*(x_sum-x_length*delta[[j]][k-1,i]+
              y_sum+y_length*delta[[j]][k-1,i])+t1[k-1,j]*u1[k-1,j]
        a1=t[[j]][k-1,i]*( x_length+y_length)+t1[k-1,j]
        v[[j]][k,i]=rnorm(1,b1/a1, 1/sqrt(a1))
        
        b2=t[[j]][k-1,i]*(x_sum - x_length*v[[j]][k,i]-y_sum 
                    +y_length*v[[j]][k,i])+t2[k-1,j]*u2[k-1,j]
        a2=t[[j]][k-1,i]*(x_length+y_length)+t2[k-1,j]
        delta[[j]][k,i]=rnorm(1,b2/a2, 1/sqrt(a2))
      
        t[[j]][k,i]=rgamma(1,shape=(x_length+y_length)/2+a,
  rate=b+sum((x[[j]][i,]-v[[j]][k,i]-delta[[j]][k,i])^2)+
    sum((y[[j]][i,]-v[[j]][k,i]+delta[[j]][k,i])^2))
      }
      u1[k,j]=rnorm(1,sum(v[[j]][k,])/r,1/sqrt(t1[k-1,j]*r))
      
      u2[k,j]=rnorm(1,sum(delta[[j]][k,])/r,1/sqrt(t2[k-1,j]*r))
      
      t1[k,j]=rgamma(1,r/2,sum((v[[j]][k,]-u1[k,j])^2)/2)
     
       
      t2[k,j]=rgamma(1,r/2,sum((delta[[j]][k,]-u2[k,j])^2)/2)
      
      alpha1=alpha[k-1,j]
      alpha2=runif(1,1/2,2)*alpha1
      a3=alpha2^2/beta[k-1,j]
      b3=alpha2/beta[k-1,j]
      sum_lgt=sum(log(t[[j]][k,]))
      sum_t=sum(t[[j]][k,])
      
      prob1=r*((a3-1)*log(b3)-lgamma(a3))+
      a3*sum_lgt-b3*sum_t-r*((a-1)*log(b)-lgamma(a))-a*sum_lgt+b*sum_t
      prob1=min(0,prob1+log(alpha1/alpha2))
      alpha[k,j]=ifelse(log(runif(1))<=prob1,alpha2,alpha1)
      
      
      beta1=beta[k-1,j]
      beta2=runif(1,1/2,2)*beta1
      a4=alpha[k,j]^2/beta1
      b4=alpha[k,j]/beta1
      a5=alpha[k,j]^2/beta2
      b5=alpha[k,j]/beta2
      
      prob2=r*((a5-1)*log(b5)-lgamma(a5))+
      a5*sum_lgt-b5*sum_t-r*((a4-1)*log(b4)-lgamma(a4))-a4*sum_lgt+b4*sum_t
      prob2=min(0,prob1+log(beta1/beta2))
      beta[k,j]=ifelse(log(runif(1))<=prob2,beta1,beta[k-1,j])
      }
}
 for(j in 1:2){
  for(i in 1:r){
    par(mfrow=c(3,1))
    plot(v[[j]][,i],type="l",xlab="Iteration",ylab="Value of Nu",
         main=paste("Covergence of Nu in Gene",i,"for",ifelse(j==1,"First Data","Second Data")))
    plot(t[[j]][,i],type="l",xlab="Iteration",ylab="Value of Tau",
         main=paste("Covergence of Tau in Gene",i,"for",ifelse(j==1,"First Data","Second Data")))
    plot(delta[[j]][,i],type="l",xlab="Iteration",ylab="Value of Delta",
         main=paste("Covergence of Delta in Gene",i,"for",ifelse(j==1,"First Data","Second Data")))
  }
  par(mfrow=c(3,2))
  plot(u1[,j],type="l",xlab="Iteration",ylab="Value of Mu",
       main=paste("Covergence of Mu","for",ifelse(j==1,"First Data","Second Data")))
  plot(u2[,j],type="l",xlab="Iteration",ylab="Value of M",
       main=paste("Covergence of M","for",ifelse(j==1,"First Data","Second Data")))
  plot(t1[,j],type="l",xlab="Iteration",ylab="Value of Rho",
       main=paste("Covergence of Rho","for",ifelse(j==1,"First Data","Second Data")))
  plot(t2[,j],type="l",xlab="Iteration",ylab="Value of R",
       main=paste("Covergence of R","for",ifelse(j==1,"First Data","Second Data")))
  plot(alpha[,j],type="l",xlab="Iteration",ylab="Value of Alpha",
       main=paste("Covergence of Alpha","for",ifelse(j==1,"First Data","Second Data")))
  plot(beta[,j],type="l",xlab="Iteration",ylab="Value of Beta",
       main=paste("Covergence of Beta","for",ifelse(j==1,"First Data","Second Data")))
  
}

According to these massive plots, we should conclude these parameters are convergent. Plus we should conclude that none of these genes are differential expressed respect to any significant level according to \(r\).

5. Problem 5: Adaptive Rejection Sampling(Review)

If the \(pdf\) is not log-concave, then the algorithm really depends on luck. If we sample a point where is not log-concave and add it to form a new upperbound,the intercept point for the upperbound is misordered. Thus, we should not apply this algorithm to non-log-concave distribution.